## Title: Replication file for the main text of Somer-Topcu and Weitzel 
## Date: 2021-11-23
## Purpose: File replicates Table 2, Table 3, Figure 1 and Table 4 from the manuscript 
## Data used: df_monadic.csv and df_dyadic.csv 
## Note: In order to facilitate replication with other software the glmer models from the lme4 package follow 
##       the default specifications in Stata. We set nAGQ = 7.

## Load Libraries
library("rio")           ## data import
library("tidyverse")     ## data wrangling
library("lme4")          ## model estimation
library("texreg")        ## table export
library("sjPlot")        ## model visualization
library("ggpubr")        ## combining the plots

## Layout defaults
theme_update(text = element_text(size=20))

################################### 
## Load the data 
## two data frames are loaded. One for the monadic models and one for the dyadic ones.
df_monadic <- read_csv("df_cps_monadic.csv") 
df_dyadic  <- read_csv("df_cps_dyadic.csv")

################################### 
## Table 2
## The summary statistics come from the monadic and the dyadic data set. 
## Below we first reduce the monadic data set to the variables that are in the Table 3 models and then report the summary 
## statistics for that. The summary statistics for variables from the dyadic data set come from different subsets of the data.
## The appropriate sub-setting is done below in order to increase transparency. 

df_sumstats <- 
  df_monadic %>% 
  dplyr::select(votedummy, share_neg_disc_by_others_all, votet1_pollt1_diff, poll_campaign_start, pid_dummy_no, ycoun, 
                copartisan, nonpartisan, outpartisan, last_govt, polkno, age, gender, ccode, allstatements_all, ccode) %>% 
  drop_na()

# Party-level variables
# Received Attacks
summary(df_sumstats$share_neg_disc_by_others_all)
sd(df_sumstats$share_neg_disc_by_others_all, na.rm = TRUE)

# Party’s Attacks
df_sumstats_dyadic1 <- 
  df_dyadic %>% 
  dplyr::select(subject_cmp_code, other_cmp_code, votedummy_other, share_nov, share_nov_leader_all, share_nov_party_all, votet1_pollt1_diff_other, 
                other_poll_campaign_start, polkno, age, gender, other_last_govt, ccode, year) %>% 
  drop_na() %>% 
  dplyr::select(subject_cmp_code, other_cmp_code, ccode, year, share_nov) %>% 
  unique() 

summary(df_sumstats_dyadic1$share_nov)
sd(df_sumstats_dyadic1$share_nov, na.rm = TRUE)

# Others’ Attacks
df_sumstats_dyadic2 <- 
  df_dyadic%>% 
  dplyr::select(subject_cmp_code, other_cmp_code, votedummy_other, share_nov_others, share_nov_leader_all, share_nov_party_all, votet1_pollt1_diff_other, 
                other_poll_campaign_start, polkno, age, gender, other_last_govt, ccode, year) %>% 
  drop_na() %>% 
  dplyr::select(subject_cmp_code, other_cmp_code, ccode, year, share_nov_others) %>% 
  unique() 

summary(df_sumstats_dyadic2$share_nov_others)
sd(df_sumstats_dyadic2$share_nov_others, na.rm = TRUE)

# Party Performance
summary(df_sumstats$poll_campaign_start)
sd(df_sumstats$poll_campaign_start, na.rm = TRUE)
# Delta Party Performance
summary(df_sumstats$votet1_pollt1_diff)
sd(df_sumstats$votet1_pollt1_diff, na.rm = TRUE)
# Government t-1
summary(df_sumstats$last_govt)
sd(df_sumstats$last_govt, na.rm = TRUE)

# Individual-level variables
# Vote choice (DV)
summary(df_sumstats$votedummy)
sd(df_sumstats$votedummy,na.rm = TRUE)
# Political knowledge
summary(df_sumstats$polkno)
sd(df_sumstats$polkno)
# Age
summary(df_sumstats$age)
sd(df_sumstats$age)
# Male
summary(df_sumstats$gender)
sd(df_sumstats$gender)

## Cleaning up
rm(df_sumstats, df_sumstats_dyadic1, df_sumstats_dyadic2)

################################### 
## Table 3
## The code below replicates Table 3 in the manuscript. We first specify the model formulas, then estimate the model and use the texreg package to report the results.

## Model specification
model_t3a_monadic <- as.formula(votedummy ~ share_neg_disc_by_others_all + votet1_pollt1_diff + poll_campaign_start + as.factor(pid_dummy_no) + as.factor(last_govt) + polkno + age + as.factor(gender) + as.factor(ccode) + (1 | party_election))
model_t3b_monadic <- as.formula(votedummy ~ share_neg_disc_by_others_all + votet1_pollt1_diff + poll_campaign_start + as.factor(last_govt) + polkno + age + as.factor(gender) + as.factor(ccode) + (1 | party_election))
model_t3c_dyadic  <- as.formula(votedummy_other ~ share_nov_others + share_nov + votet1_pollt1_diff_other + other_poll_campaign_start + polkno + age + as.factor(gender) + as.factor(other_last_govt) + as.factor(ccode) + (1 | party_election))

## Model estimation
t3_c1 <- glmer(model_t3a_monadic, data = subset(df_monadic), nAGQ = 7, family = binomial(link = "logit"))
t3_c2 <- glmer(model_t3b_monadic, data = subset(df_monadic, pid_dummy_no == 1), nAGQ = 7, family = binomial(link = "logit"))
t3_c3 <- glmer(model_t3c_dyadic,  data = subset(df_dyadic, copartisan_subject == 1), nAGQ = 7, family = binomial(link = "logit"))
t3_c4 <- glmer(model_t3b_monadic, data = subset(df_monadic, nonpartisan == 1), nAGQ = 7, family = binomial(link = "logit"))

## Model reporting 
screenreg(list(t3_c1, t3_c2, t3_c3, t3_c4), 
          omit.coef = "ccode",
          stars = c(0.05),
          custom.note = "Note: Dependent variable is vote choice. Standard errors in parentheses. The models \n include party-election random intercepts and country fixed effects. \n %stars.",
          custom.coef.names = c("Intercept", "Received Attacks", "Delta Party Performance", "Party Performance",
                                "Copartisan", "Government t-1", "Political Knowledge", "Age", "Gender", "Others' attacks",
                                "Party's Attacks",  "Delta Party Performance", "Party Performance","Government t-1"),
          reorder.coef = c(2,11,10,3,4,5,6,7,8,9,1),
          custom.gof.names = c(NA, NA, NA, "Observations", "Groups", "Var: Groups (Intercept)"),
          custom.header = list("All" = 1, "Copartisans, target" = 2, "Copartisans, attacker" = 3, "Nonpartisans" = 4))

rm(model_t3a_monadic, model_t3b_monadic, model_t3c_dyadic,
   t3_c1, t3_c2, t3_c3, t3_c4)

################################### 
## Figure 1 
## Model specification
model_f1a_monadic <- as.formula(votedummy ~ share_neg_disc_by_others_all + votet1_pollt1_diff + poll_campaign_start + as.factor(pid_dummy_no) + as.factor(last_govt) + polkno + age + as.factor(gender) + as.factor(Country) + (1 | party_election))
model_f1b_monadic <- as.formula(votedummy ~ share_neg_disc_by_others_all + votet1_pollt1_diff + poll_campaign_start + as.factor(last_govt) + polkno + age + as.factor(gender) + as.factor(Country) + (1 | party_election))
model_f1c_dyadic  <- as.formula(votedummy_other ~ share_nov_others + share_nov + votet1_pollt1_diff_other + other_poll_campaign_start + polkno + age + as.factor(gender) + as.factor(other_last_govt) + as.factor(Country) + (1 | party_election))

## Model estimation
f1_m1_country <- glmer(model_f1b_monadic, data = subset(df_monadic, pid_dummy_no == 1), nAGQ = 7, family = binomial(link = "logit"))
f1_m2_country <- glmer(model_f1c_dyadic, data = subset(df_dyadic, copartisan_subject == 1), nAGQ = 7, family = binomial(link = "logit"))
f1_m3_country <- glmer(model_f1b_monadic, data = subset(df_monadic, nonpartisan == 1), nAGQ = 7, family = binomial(link = "logit"))

### Generating the four panels for Figure 1
### Figure 1 Panel 1 
fig1_p1 <-
  plot_model(f1_m1_country, 
             type = "pred", 
             terms = c("share_neg_disc_by_others_all [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]", "Country"),
             axis.title = c("Received Attacks" ,"Probability of vote for the targeted party"),
             title = "(1) Copartisans, targeted") + 
  theme_minimal()  +
  theme(axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none")

### Figure 1 Panel 2
fig1_p2 <-
  plot_model(f1_m2_country,  
             type = "pred", 
             terms = c("share_nov[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]", "Country"),
             axis.title = c("Received Attacks" ,""),
             title = "(2) Copartisans, attacker") + 
  theme_minimal()  +
  theme(axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none")

### Figure 1 Panel 3
fig1_p3 <-
  plot_model(f1_m2_country,  
             type = "pred", 
             terms = c("share_nov_others[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]", "Country"),
             axis.title = c("Received Attacks (others)" ,"Probability of vote for the targeted party"),
             title = "(3) Copartisans, third parties") +
  theme_minimal()  +
  theme(axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none")

### Figure 1 Panel 4
fig1_p4 <-
  plot_model(f1_m3_country,  
             type = "pred", 
             terms = c("share_neg_disc_by_others_all [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]", "Country"),
             axis.title = c("Received Attacks" ,""),
             title = "(4) Nonpartisans") + 
  theme_minimal()  +
  theme(axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), legend.position = "none")

## Exporting the graph
ggarrange(fig1_p1, fig1_p2, 
          fig1_p3, fig1_p4, 
          ncol=2, nrow=2, common.legend = TRUE, legend="bottom")

## Cleaning up
rm(fig1_p1, fig1_p2, fig1_p3, fig1_p4,
   model_f1a_monadic, model_f1b_monadic, model_f1c_monadic,
   f1_m1_country, f1_m2_country, f1_m3_country)

################################### 
## Table 4
## We first generate a data frame that only includes observations that were in the models of Table 3 to report actual numbers from our analysis. 
## At the end, we also add a new variable that indicates whether a person was a voter or not. Afterwards the percentages for the individual rows are calculated. 
## The counts can be obtained by removing "/length(unique(df_table4$r_id))" from the last four statements. 

df_table4 <- 
  df_monadic %>% 
  dplyr::select(votedummy, share_neg_disc_by_others_all, votet1_pollt1_diff, poll_campaign_start, pid_dummy_no, last_govt, polkno, age, gender, ccode,ycoun, cmp_code, r_id,
                copartisan, nonpartisan, outpartisan, pid_dummy_no,party_election, allstatements_all ) %>% 
  drop_na() %>% 
  group_by(r_id) %>% 
  add_tally(votedummy)  

## Number of respondents that are in the analysis: 16,467
length(unique(df_table4$r_id))
## Partisans who voted for the party they identified with 
round(length(df_table4$r_id[df_table4$copartisan  == 1 & df_table4$votedummy == 1])/length(unique(df_table4$r_id)),2) 
## Respondents who identify with and voted for different parties
round(length(df_table4$r_id[df_table4$outpartisan == 1 & df_table4$votedummy == 1])/length(unique(df_table4$r_id)),2) 
## Respondents who did not identify with any party but voted 
round(length(df_table4$r_id[df_table4$nonpartisan == 1 & df_table4$votedummy == 1])/length(unique(df_table4$r_id)),2) 
## Respondents who did not vote for a party in the data set  
round(length(unique(df_table4$r_id[df_table4$n == 0]))/length(unique(df_table4$r_id)),2)

rm(df_table4, df_monadic, df_dyadic)
# fin